home *** CD-ROM | disk | FTP | other *** search
- /* $Header: /home/panda/pg/bevan/progs/elk/lib/RCS/gnu_regexp.c,v 1.5 91/04/02 14:32:25 bevan Exp $ */
-
- /*+c
- ** DESCRIPTION
- ** Regular Expressions for ELK
- ** These are based on the GNU regular expression code
- ** The code is written in K&R C to maintain compatibility with the rest of ELK
- **
- ** USE:
- ** To avoid name clashes, everything is prefixed by gnu. This will help
- ** if you add a different regexp package, such as Henry Spencer's.
- **
- ** gnu:regexp
- ** A type that represents gnu regular expressions.
- **
- ** (gnu:make-regexp str)
- ** Generate a regulare expression from the given string. Does not return
- ** if there is an error in the string.
- **
- ** (gnu:regexp? obj)
- ** Is the object a GNU regular expression?
- **
- ** (gnu:regexp-exec regexp str start)
- ** Apply the GNU regular expression `regexp' to the string `str' starting
- ** at position `start'. If the match succeeds it returns a gnu:regexp-match
- ** It returns #f otherwise.
- **
- ** gnu:regexp-match
- ** A type that represents regular expression matches.
- **
- ** (gnu:regexp-match? obj)
- ** Is the object a GNU regular expression match.
- **
- ** (gnu:regexp-start regexp-match match-number)
- ** Returns the start position of the match denoted by `match-number'
- ** The whole regexp is 0. Each further number represents positions
- ** enclosed by \\(\\) sections.
- **
- ** (gnu:regexp-end regexp-match match-number)
- ** Returns the end position of the match denoted by `match-number'
- ** See the above.
- **
- ** Note the above produce values such that they can directly be used by
- ** substring
- **
- ** The interface to the regexp code is a mixture of the Emacs style and that to
- ** the regexp package in Python. This has been done so that there is a low
- ** level on which packages to emulate either of the above could be written.
- ** For example one nice feature of Python's regexp is that it returns a list
- ** of the match positions. This can easily be emulated by consing up a list
- ** from a gnu:match
- **
- ** EXAMPLE
- **
- ** (define scheme-define-matcher
- ** (gnu:make-regexp ""^[ \t]*(define[ \t]+(?\\([---A-Za-z0-9:?]+\\)[ \t]*"))
- **
- ** This creates a regexp that will match most of the scheme function names
- ** I use. (Note it doesn't match symbolic ones like (define (* a b) ...))
- **
- ** (define str "(define (some-scheme-function a b) ...)")
- **
- ** (define match (gnu:regexp-exec scheme-define-matcher str 0))
- **
- ** (if (gnu:regexp-match? match)
- ** (let ((name (substring str
- ** (gnu:regexp-start match 1)
- ** (gnu:regexp-end match 1))))
- ** (display name))
- ** (error 'foo "couldn't find a match"))
- **
- ** should output
- **
- ** some-scheme-function
- **
- */
-
-
- #include <scheme.h>
-
- /* Note that a .c file is being included here. This is so that a single .o
- ** file is created rather than one for this file and one for the GNU regex
- ** code. The reason for not wanting two .o files is that I can't think how to
- ** reliably make sure that the GNU regex .o is loaded in before this .o (One
- ** solution would be to add an init section with a Provide in it to it, but I'd
- ** rather not do that)
- */
- #include "gnu_regex.c"
-
- /* Placeholders for the Gnu_Regexp and Gnu_RegexpMatch types. */
- static int T_Gnu_Regexp;
- static int T_Gnu_RegexpMatch;
-
- /* A regular expression is represented as a pointer to
- ** the compiled regular expression pattern.
- ** Is the nothing field really necessary?
- */
- struct S_Gnu_Regexp
- {
- Object nothing;
- struct re_pattern_buffer compiled_pattern;
- Object the_pattern;
- };
-
- /* A the result of a regular expression match is a structure
- ** storing the positions of the match.
- ** Is the nothing field really necessary ?
- */
- struct S_Gnu_RegexpMatch
- {
- Object nothing;
- struct re_registers matches;
- };
-
- /* Convert from a generic ELK pointer to GNUREGEXPs and GNUREGEXPMATCHs. */
- #define GNUREGEXP(obj) ((struct S_Gnu_Regexp *)POINTER(obj))
- #define GNUREGEXPMATCH(obj) ((struct S_Gnu_RegexpMatch *)POINTER(obj))
-
- /* The following set of functions are the standard ones you have to define
- ** inorder to create a type for ELK.
- */
-
- /* Two Gnu_Regexps are eqv? if they share the strings that they are a pattern of */
-
- static int Gnu_Regexp_Eqv(a, b)
- Object a, b;
- {
- return GNUREGEXP(a)->the_pattern == GNUREGEXP(b)->the_pattern;
- }
-
- /* Two Gnu_Regexps are equal? if they represent the same pattern. */
-
- static int Gnu_Regexp_Equal(a, b)
- Object a, b;
- {
- return General_Strcmp(GNUREGEXP(a)->the_pattern, GNUREGEXP(b)->the_pattern, 0) == 0;
- }
-
- static void Gnu_Regexp_Print(regexp, port, raw, depth, len)
- Object regexp, port;
- int raw, depth, len;
- {
- Printf(port, "#[gnu:regexp ");
- Pr_String(port, GNUREGEXP(regexp)->the_pattern, 0);
- Printf(port, "]");
- }
-
- static void Gnu_Regexp_Visit(x, f)
- Object *x;
- void (*f)();
- {
- struct S_Gnu_Regexp *p= GNUREGEXP(*x);
- (*f)(&(p->the_pattern));
- }
-
- static Object P_Gnu_Regexpp(x)
- Object x;
- {
- return TYPE(x) == T_Gnu_Regexp ? True : False;
- }
-
- static int Gnu_RegexpMatch_Eqv(a, b)
- Object a, b;
- {
- return EQ(a, b);
- }
-
- /* There is not good way to tell if two Gnu_RegexpMatches are equal rather than
- ** eqv, so just use the eqv definition.
- */
- static int Gnu_RegexpMatch_Equal(a, b)
- Object a, b;
- {
- return EQ(a, b);
- }
-
- static void Gnu_RegexpMatch_Print(regexpm, port, raw, depth, len)
- Object regexpm, port;
- int raw, depth, len;
- {
- Printf(port, "#[gnu:regexp-match %lu]", POINTER(regexpm));
- }
-
- static Object P_Gnu_RegexpMatchp(x)
- Object x;
- {
- return TYPE(x) == T_Gnu_RegexpMatch ? True : False;
- }
-
- /* End of standard functions */
-
-
- /* Given a string, it compiles it into a regular expression. */
- /* Is the Link + Unlink stuff necessary ? */
-
- static Object P_Make_Gnu_Regexp(str)
- Object str;
- {
- Object regexp;
- char *error;
- GC_Node2;
-
- regexp= Null;
- GC_Link2(str, regexp);
- Check_Type(str, T_String);
- regexp= Alloc_Object(sizeof(struct S_Gnu_Regexp), T_Gnu_Regexp, 0);
- GNUREGEXP(regexp)->nothing= Null;
- GNUREGEXP(regexp)->the_pattern= str;
- /* the value 40 is a arbitrary initial buffer size */
- GNUREGEXP(regexp)->compiled_pattern.allocated= 40;
- GNUREGEXP(regexp)->compiled_pattern.buffer= Safe_Malloc(40);
- GNUREGEXP(regexp)->compiled_pattern.fastmap= NULL;
- GNUREGEXP(regexp)->compiled_pattern.translate= NULL;
- error= re_compile_pattern(STRING(str)->data, STRING(str)->size, &GNUREGEXP(regexp)->compiled_pattern);
- GC_Unlink;
- if (error != (char *)0)
- Primitive_Error(error);
- return regexp;
- }
-
- static Object P_Gnu_Regexp_exec(regexp, str, start)
- Object regexp, str, start;
- {
- int intStart;
- int errorCode;
- Object result;
- GC_Node4;
-
- result= Null;
- GC_Link4(regexp, str, start, result);
- Check_Type(regexp, T_Gnu_Regexp);
- Check_Type(str, T_String);
- result= Alloc_Object(sizeof(struct S_Gnu_RegexpMatch), T_Gnu_RegexpMatch, 0);
- GNUREGEXPMATCH(result)->nothing= Null;
- intStart= Get_Integer(start);
- errorCode= re_match(&GNUREGEXP(regexp)->compiled_pattern, STRING(str)->data,
- STRING(str)->size, intStart, &GNUREGEXPMATCH(result)->matches);
- GC_Unlink;
- if (errorCode == -2)
- Primitive_Error("Gnu_Regexp Stack Overflow");
- return (errorCode == -1) ? False : result;
- }
-
- /* Return the start position of a particular regular expression match. */
-
- static Object P_Gnu_RegexpMatch_Start(regexp_match, match_number)
- Object regexp_match, match_number;
- {
- int int_match_number;
- Check_Type(regexp_match, T_Gnu_RegexpMatch);
- int_match_number= Get_Integer(match_number);
- if (int_match_number >= RE_NREGS || int_match_number < 0)
- Primitive_Error("Match number not in range ~s", match_number);
- return Make_Integer(GNUREGEXPMATCH(regexp_match)->matches.start[int_match_number]);
- }
-
- /* Return the end position of a particular regular expression match. */
-
- static Object P_Gnu_RegexpMatch_End(regexp_match, match_number)
- Object regexp_match, match_number;
- {
- int int_match_number;
- Check_Type(regexp_match, T_Gnu_RegexpMatch);
- int_match_number= Get_Integer(match_number);
- if (int_match_number >= RE_NREGS || int_match_number < 0)
- Primitive_Error("Match number not in range ~s", match_number);
- return Make_Integer(GNUREGEXPMATCH(regexp_match)->matches.end[int_match_number]);
- }
-
-
- /* Initialise the Gnu_Regexp extensions. */
-
- void init_gnu_regexp()
- {
- /* Define the Gnu_Regexp type */
- T_Gnu_Regexp= Define_Type(
- 0,
- "gnu:regexp",
- NOFUNC,
- sizeof(struct S_Gnu_Regexp),
- Gnu_Regexp_Eqv,
- Gnu_Regexp_Equal,
- Gnu_Regexp_Print,
- Gnu_Regexp_Visit
- );
- Define_Primitive(P_Make_Gnu_Regexp, "gnu:make-regexp", 1, 1, EVAL);
- Define_Primitive(P_Gnu_Regexpp, "gnu:regexp?", 1, 1, EVAL);
- Define_Primitive(P_Gnu_Regexp_exec, "gnu:regexp-exec", 3, 3, EVAL);
-
- /* Define the Gnu_RegexpMatch type
- ** Notice that there is no public constructor for this type.
- ** The only way a Gnu_RegexpMatch can be created is as the result
- ** of a Gnu_Regexp-exec call
- */
- T_Gnu_RegexpMatch= Define_Type(
- 0,
- "gnu:regexp-match",
- NOFUNC,
- sizeof(struct S_Gnu_RegexpMatch),
- Gnu_RegexpMatch_Eqv,
- Gnu_RegexpMatch_Equal,
- Gnu_RegexpMatch_Print,
- NOFUNC
- );
- Define_Primitive(P_Gnu_RegexpMatchp, "gnu:regexp-match?", 1, 1, EVAL);
- Define_Primitive(P_Gnu_RegexpMatch_Start, "gnu:regexp-start", 2, 2, EVAL);
- Define_Primitive(P_Gnu_RegexpMatch_End, "gnu:regexp-end", 2, 2, EVAL);
- P_Provide(Intern("gnu_regexp.o"));
- }
-